#load CESHOP example data
#load("../_data/sales_campaign_data.Rdata")
#glimpse(sales_data)

1 CE Shop Example VISUAL Analysis

  • Question about visualizing data
  • Creating Example Charts
  • Showing required (for R & ggplot) Data Manipulation
# Load libraries for data manipulation, plotting, interactivity, and label formatting
# If not installed, run: install.packages(c("tidyverse", "lubridate", "scales", "plotly"))
library(tidyverse)
library(lubridate)
library(scales)
library(plotly)
library(rmdformats)

1.1 Setup

  1. Create the psuedo-data
  2. Initialize a consistent branded theme (color set)

1.1.1 Create Example Data

# Set the start and end dates for the dataset.
start_date <- as.Date("2022-01-01")
end_date <- as.Date("2025-09-26")

# List of course names to be included in the dataset.
course_names <- c(
  "75-Hr. PA Sales Pre-Licensing Course Only Package",
  "20-Hour Mortage Broker Education",
  "Appraisers State Exam",
  "Outsmartimg the 20215 Housing Market",
  "Real Estate Introduction",
  "Rental vs Sales - Defining Your Path",
  "Transition from Home Real Estate to Commerical Real Estate"
)

# List of promotions and their application probability.
promotions <- c("30% Off", "40% Off", "50% Off", "None")
promo_probabilities <- c(0.25, 0.20, 0.15, 0.40)

# List of campaign platforms and their usage probability.
platforms <- c("Email", "LinkedIn", "Facebook", "Conference", "Direct")
platform_probabilities <- c(0.30, 0.20, 0.20, 0.10, 0.20)

# Define the total number of records to generate.
date_sequence <- seq(from = start_date, to = end_date, by = "day")
number_of_records <- length(date_sequence) * 5

# Create a tibble (a modern data frame) with random assignments.
sales_data <- tibble(
  Date = sample(date_sequence, number_of_records, replace = TRUE),
  Product_Name = sample(course_names, number_of_records, replace = TRUE),
  Promotion_Applied = sample(promotions, number_of_records, replace = TRUE, prob = promo_probabilities),
  Professions = sample(c("Real Estate Agent", "Mortgage Broker", "Appraiser", "Other"), number_of_records, replace = TRUE),
  Line_of_Business = sample(c("Commerical", "Residential", "Private", "Government", "Other"), number_of_records, replace = TRUE),
  Traffic_on_Site = sample(100:1000, number_of_records, replace = TRUE),
  Status = sample(c("Pre-Licensing", "Continuing Education", "Other"), prob = c(.45, .45, .1), number_of_records, replace = TRUE),
  Campaign_Platform = sample(platforms, number_of_records, replace = TRUE, prob = platform_probabilities),
  Tactic_Testing = sample(c("A", "B", "C"), number_of_records, replace = TRUE)
) %>%
  mutate(
    # Fix Promotion_Rate calculation for "None"
    Promotion_Rate = ifelse(Promotion_Applied == "None", 0,
      as.numeric(gsub(x = Promotion_Applied, pattern = "% Off", "")) / 100
    ),
    Course_Cost = case_when(
      Product_Name == "75-Hr. PA Sales Pre-Licensing Course Only Package" ~ 7500,
      Product_Name == "20-Hour Mortage Broker Education" ~ 2000,
      Product_Name == "Appraisers State Exam" ~ 750,
      Product_Name == "Outsmartimg the 20215 Housing Market" ~ 1000,
      Product_Name == "Real Estate Introduction" ~ 1500,
      Product_Name == "Rental vs Sales - Defining Your Path" ~ 200,
      Product_Name == "Transition from Home Real Estate to Commerical Real Estate" ~ 750,
      TRUE ~ 0
    ),
    # Calculate Cost_Yeild (discount)
    Cost_Yeild = round(Course_Cost * Promotion_Rate, 2),
    # Create the new Revenue column (Price paid by customer)
    Revenue = Course_Cost * (1 - Promotion_Rate)
  )

1.1.2 Branded Themeing for R

# Define branded color sets
.base_colors <- list(
  "primary_blues" = c("#005287", "#00354e", "#0278af", "#5a5c5d", "#c1c2c4",  "#5a5c5d", "#c1c2c4", "#5a5c5d", "#292323", "#292323"),
  "primary_reds" = c(
    "#c42032", # visitech red (or close....)
    "#850101", #: A deep, dark red.
    "#6D2B2C", #: A more muted, desaturated red.
    "#614041", # A reddish-brown or dusky rose color.
    "#555556", # A medium, neutral grey.
    "#5a5c5d",
    "#5a5c5d",
    "#5a5c5d",
    "#292323",
    "#292323",
    "#292323"
  ), # A very dark grey, close to black
  "combined_colors" = c(
    "#005287", "#c42032",
    "#00354e", "#850101",
    "#0278af", "#6D2B2C",
    "#c1c2c4", "#5a5c5d",
    "#685786", "#ffcc05",
    "#0278af", "#00354e"
  )
)

# Custom ggplot theme
theme_cta_resize <- function() {
  theme_minimal(base_family = "Arial") +
    theme(
      plot.title = element_text(
        family = "Arial", face = "bold", size = rel(1.25), color = "#005287", hjust = 0.5, margin = margin(b = 10)
      ),
      plot.subtitle = element_text(
        family = "Arial", size = rel(1), color = "#5a5c5d", hjust = 0.5, margin = margin(b = 10)
      ),
      axis.title = element_text(
        family = "Arial", size = rel(.9), color = "#005287", face = "bold"
      ),
      axis.text = element_text(
        family = "Arial", size = rel(.75), color = "#5a5c5d"
      ),
      legend.title = element_text(
        family = "Arial", size = rel(.75), color = "#005287", face = "bold"
      ),
      legend.text = element_text(
        family = "Arial", size = rel(.65), color = "#5a5c5d"
      ),
      panel.background = element_rect(fill = "#ffffff", color = NA),
      panel.grid.major = element_line(color = "#c1c2c4"),
      panel.grid.minor = element_blank(),
      strip.background = element_rect(fill = "#0278af", color = "#005287"),
      strip.text = element_text(family = "Arial", size = rel(.9), color = "#ffffff", face = "bold")
    )
}

2 Visual Analysis Section

  • Analysis based on described data from The CE Shop
  • Created by Colin T. Annand

2.1 Charts

2.1.1 Monthly Revenue Over Time (Interactive Line Chart) —

# Prepare data: aggregate revenue by month
monthly_revenue <- sales_data %>%
  mutate(Month = floor_date(Date, "month")) %>%
  group_by(Month) %>%
  summarise(Total_Revenue = sum(Revenue))

# Create ggplot object
g1 <- monthly_revenue %>%
  ggplot(aes(x = Month, y = Total_Revenue, text = paste(
    "Month: ", format(Month, "%b %Y"),
    "<br>Revenue: ", dollar(Total_Revenue)
  ))) +
  geom_line(color = .base_colors$primary_blues[1], size = 1) +
  geom_point(color = .base_colors$primary_blues[1], size = 2) +
  labs(
    title = "Monthly Revenue Over Time",
    subtitle = "Total sales revenue aggregated by month",
    x = "Month",
    y = "Total Revenue"
  ) +
  scale_y_continuous(labels = dollar_format()) +
  scale_x_date(date_breaks = "6 months", date_labels = "%b %Y") +
  theme_cta_resize()

# Convert to interactive plotly chart
# To view, simply type 'p1' in the R console
p1 <- ggplotly(g1, tooltip = "text")

# Print the static version
print(g1)

2.1.2 Total Revenue by Product (Horizontal Bar Chart) —

# Create plot
g2 <- sales_data %>%
  group_by(Product_Name) %>%
  summarise(Total_Revenue = sum(Revenue)) %>%
  # Reorder Product_Name by Total_Revenue for a sorted chart
  ggplot(aes(x = Total_Revenue, y = reorder(Product_Name, Total_Revenue), fill = Product_Name)) +
  geom_col() +
  labs(
    title = "Total Revenue by Product",
    subtitle = "Revenue generated from each course",
    x = "Total Revenue",
    y = "Product"
  ) +
  scale_x_continuous(labels = dollar_format()) +
  scale_fill_manual(values = .base_colors$primary_blues) +
  theme_cta_resize() +
  theme(legend.position = "none") # Remove legend as y-axis is clear

# Print the static chart
print(g2)

2.1.3 Sales Volume by Campaign Platform (Bar Chart) —

# Create plot
g3 <- sales_data %>%
  # Count occurrences of each platform
  count(Campaign_Platform) %>%
  ggplot(aes(x = reorder(Campaign_Platform, -n), y = n, fill = Campaign_Platform)) +
  geom_col() +
  geom_text(aes(label = comma(n)), vjust = -0.5, color = "#00354e", size = 3.5) +
  labs(
    title = "Sales Volume by Campaign Platform",
    subtitle = "Total number of sales attributed to each platform",
    x = "Campaign Platform",
    y = "Number of Sales"
  ) +
  scale_y_continuous(labels = comma_format()) +
  scale_fill_manual(values = .base_colors$primary_reds) +
  theme_cta_resize() +
  theme(legend.position = "none")

# Print the static chart
print(g3)

# Create plot
g3a <- sales_data %>%
  group_by(Campaign_Platform, Promotion_Applied) %>%
  summarise(Total_Sales = n(), .groups = 'drop') %>%
  arrange(Campaign_Platform, Promotion_Applied, desc(Total_Sales)) %>%
  ggplot(aes(x = Campaign_Platform, y = Total_Sales, fill = Promotion_Applied)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_text(
    aes(label = comma(Total_Sales)),
    vjust = -0.5,
    color = "#00354e",
    size = 3.5,
    # Add position_dodge to align labels with the bars
    position = position_dodge(width = 0.9) 
  ) +
  labs(
    title = "Sales Volume by Campaign Platform",
    subtitle = "Total number of sales attributed to each platform",
    x = "Campaign Platform",
    y = "Number of Sales"
  ) +
  scale_y_continuous(labels = comma_format()) +
  scale_fill_manual(values = .base_colors$primary_reds) +
  theme_cta_resize()
  #theme(legend.position = "none")

# Print the static chart
print(g3a)

#### Notes: - Of particular importance here, is the

2.1.4 Revenue by Product and Business Status (Faceted Bar Chart) —

# Create plot
g4 <- sales_data %>%
  #mutate Product Name, to wrap strings
  mutate(Product_Name = str_wrap(Product_Name, width = 25)) %>%
  group_by(Product_Name, Status) %>%
  summarise(Total_Revenue = sum(Revenue)) %>%
  ggplot(aes(x = Total_Revenue, y = reorder(Product_Name, Total_Revenue), fill = Status)) +
  geom_col() +
  # Create separate charts for each "Status"
  facet_wrap(~Status) +
  labs(
    title = "Revenue by Product, Segmented by Business Status",
    subtitle = "Comparing product performance across different customer segments",
    x = "Total Revenue",
    y = "Product"
  ) +
  scale_x_continuous(labels = dollar_format(scale = .001, suffix = "K")) +
  scale_fill_manual(values = .base_colors$primary_blues) +
  theme_cta_resize() +
  theme(
    legend.position = "none", # Facet titles make legend redundant
    axis.text.x = element_text(angle = 45, hjust = 1) # Angle text for readability
  )

# Print the static chart
print(g4)

2.1.5 Daily Revenue vs. Daily Traffic (Interactive Scatter Plot) —

# Prepare data: aggregate by day
daily_data <- sales_data %>%
  group_by(Date) %>%
  summarise(
    Total_Revenue = sum(Revenue),
    Total_Traffic = sum(Traffic_on_Site)
  )

# Create ggplot object
g5 <- daily_data %>%
  ggplot(aes(x = Total_Traffic, y = Total_Revenue, text = paste(
    "Date: ", Date,
    "<br>Traffic: ", comma(Total_Traffic),
    "<br>Revenue: ", dollar(Total_Revenue)
  ))) +
  geom_point(color = .base_colors$primary_blues[3], alpha = 0.6) +
  labs(
    title = "Daily Revenue vs. Daily Traffic",
    subtitle = "Each point represents one day",
    x = "Total Daily Traffic on Site",
    y = "Total Daily Revenue"
  ) +
  scale_x_continuous(labels = comma_format()) +
  scale_y_continuous(labels = dollar_format()) +
  theme_cta_resize()

# Convert to interactive plotly chart
# To view, simply type 'p5' in the R console
p5 <- ggplotly(g5, tooltip = "text")

# Print the static version
ggplotly(g5)

2.1.6 Daily Revenue - Last Two Weeks (Lollipop Chart) —

# Prepare data: filter daily data for the last 14 days
last_5_days_data <- sales_data %>%
  filter(Date >= (max(sales_data$Date) - days(7))) %>%
  group_by("Date" = lubridate::wday(Date), Promotion_Applied) %>%
  summarise(
    Total_Revenue = sum(Revenue),
    Total_Traffic = sum(Traffic_on_Site)
  ) %>% ungroup() %>% 
  mutate(Date_Factor = factor(Date, levels = c("1", "2", "3", "4", "5", "6", "7"),
                              labels = c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")))

#last_5_days_data <- daily_data_GROUPBY_promo %>%
#  filter(Date >= (max(daily_data_GROUPBY_promo$Date) - days(5))) %>%
#  arrange(Date)

# Create plot
g6 <- last_5_days_data %>%
  ggplot(aes(x = Promotion_Applied, y = Total_Revenue, color = Promotion_Applied)) +
  # Create the "stick" of the lollipop
  geom_vline(
    aes(xintercept = Promotion_Applied, y = 0, yend = Total_Revenue),
    #color = .base_colors$primary_blues[4],
    size = 0.75
  ) +
  # Create the "candy" of the lollipop
  geom_point(#color = .base_colors$primary_blues[1], 
    size = 4) +
  labs(
    title = "Daily Revenue: Last 5 Days",
    subtitle = "A detailed look at recent sales performance",
    x = "Date",
    y = "Total Revenue"
  ) +
  scale_y_continuous(labels = dollar_format()) +
  #scale_x_date(date_labels = "%b %d") + # Format date as "Jan 01"
  theme_cta_resize() +
  theme(
    panel.grid.major.x = element_blank() # Remove vertical gridlines
  )+
  facet_wrap(~Date_Factor)+
  scale_color_manual(values = .base_colors$combined_colors)

# Print the static chart
ggplotly(g6)